//*********************************************************************** // k_neighbors.cpp // // this is an implementationof the K-nearest neighbors classification // algorithm. it accepts any number of attributes and measures the // neighborhood distance using the square root of the sum of the // attribute distances squared. // // INPUTS: (from disk file) // control.dat - control file - space delimited // k value & datafile name // // datafile.clas - classification set // attribute count - don't include the classification in // the count // data - space delimited // // datafile.test - test set // attribute count - no classification data // data - space delimited // // OUTPUTS: (to disk file) // datafile.out - space delimited // test instance // k-nearest classification neighbor instances // //*********************************************************************** // WARNING: none // //*********************************************************************** // IMPLEMENTATION NOTE: all files are in the executable working directory // //*********************************************************************** // created by: j. aleshunas // created on: 18 nov 04 // modified on: 23 nov 04 // // © 2004 John Aleshunas // //*********************************************************************** #include "k_neighbors.h" void main(void) { //initialize main variables string sFilename; string sOut_filename; unsigned uK_count; unsigned uIndex1, uIndex2, uIndex3, uIndex4; float fSum_of_squares; float fDistance; bool bNot_done; // the sets of data vectors Classification_set clClass_set; Test_set clTest_set; //read control file Read_control_file(uK_count, sFilename); //read classification datafile clClass_set.Read_data(sFilename); //read test datafile clTest_set.Read_data(sFilename); //create an empty neighborhood vector vector vclNeighborhood; // create a temporary neighbor instance Neighbor_instance clTemp_result; // initialize its attribute size clTemp_result.vfAttributes.resize(clClass_set.vclInstances[0].vfAttributes.size()); // modify the filename for the output filename sOut_filename = sFilename + ".out"; // declare an output stream ofstream strResults_out_stream; // open the stream to write the output plaintext strResults_out_stream.open(sOut_filename.c_str()); //loop until end of the test instance vector (for loop) for(uIndex1 = 0; uIndex1 < clTest_set.vclInstances.size(); uIndex1++){ //reinitialize the neighborhood vector vclNeighborhood.resize(0); //loop until end of the classification instance vector (for loop) for(uIndex2 = 0; uIndex2 < clClass_set.vclInstances.size(); uIndex2++){ // compute the distance (present_dist) from test instance to classification instance //initialize the sum-of-squares to zero fSum_of_squares = 0; //loop for each attribute in the instance (for loop) for(uIndex3 = 0; uIndex3 < clTest_set.vclInstances[uIndex1].vfAttributes.size(); uIndex3++){ // initialize the distance fDistance = 0; //calculate the attribute distance fDistance = clClass_set.vclInstances[uIndex2].vfAttributes[uIndex3] -clTest_set.vclInstances[uIndex1].vfAttributes[uIndex3]; //square the attribute distance fDistance = fDistance * fDistance; //add this squared result to the sum-of-squares fSum_of_squares = fSum_of_squares + fDistance; // save the classification instance attribute value clTemp_result.vfAttributes[uIndex3] = clClass_set.vclInstances[uIndex2].vfAttributes[uIndex3]; } //for (attribute loop) //the vector distance is the square root of the sum-of-squares fDistance = (float)sqrt(fSum_of_squares); // save the calculated distance clTemp_result.fNeighbor_distance = fDistance; // save the classification instance classification value clTemp_result.sClassification = clClass_set.vclInstances[uIndex2].sClassification; //set flag variable not_done to TRUE bNot_done = true; //if neighborhood vector is empty - add new neighbor if(0 == vclNeighborhood.size()){ //append the new entry on the back vclNeighborhood.push_back(clTemp_result); //set flag variable not_done to FALSE - to skip the loop bNot_done = false; }//end if // initialize the index variable for the neighborhood search uIndex3 = 0; //loop while not_done (insert neighbor loop) while(bNot_done){ //(for each saved neighbor in the neighborhood) // if the neighborhood is not full if(vclNeighborhood.size() < uK_count){ //add the new neighbor vclNeighborhood.push_back(clTemp_result); // sort the neighborhood for(uIndex4 = (vclNeighborhood.size() - 1); uIndex4 > 0; uIndex4--){ if(vclNeighborhood[uIndex4].fNeighbor_distance < vclNeighborhood[uIndex4 - 1].fNeighbor_distance){ swap(vclNeighborhood[uIndex4], vclNeighborhood[uIndex4 - 1]); } // if } // for //set flag variable not_done to FALSE - to stop loop bNot_done = false; } // if //if the neighborhood size equals the max value if(vclNeighborhood.size() == uK_count){ //if the computed distance is less than the current saved neighbor distance //then - add new neighbor if(clTemp_result.fNeighbor_distance < vclNeighborhood[uIndex3].fNeighbor_distance) { //append the new entry on the back vclNeighborhood.push_back(clTemp_result); // sort the neighborhood for(uIndex4 = (vclNeighborhood.size() - 1); uIndex4 > 0; uIndex4--){ if(vclNeighborhood[uIndex4].fNeighbor_distance < vclNeighborhood[uIndex4 - 1].fNeighbor_distance){ swap(vclNeighborhood[uIndex4], vclNeighborhood[uIndex4 - 1]); } // if } // for //then pop off the last entry vclNeighborhood.pop_back(); //set flag variable not_done to FALSE - to stop loop bNot_done = false; } // if else { //if the current neighbor is not the last neighbor //then set the current neighbor to the next neighbor uIndex3++; // exit the loop when the neighborhood is exhausted if(uIndex3 == vclNeighborhood.size()) bNot_done = false; } // else } // if }//while (insert neighbor loop) }//for (classification instance loop) // after finding all of the nearest neighbors //write the test vector to the output file for(uIndex4 = 0; uIndex4 < clTest_set.vclInstances[uIndex1].vfAttributes.size(); uIndex4++){ strResults_out_stream << clTest_set.vclInstances[uIndex1].vfAttributes[uIndex4]; strResults_out_stream << " "; } // for strResults_out_stream << endl; //loop for each member of the neighborhood (for loop) for(uIndex3 = 0; uIndex3 < vclNeighborhood.size(); uIndex3++){ //write the neighbor vector strResults_out_stream << vclNeighborhood[uIndex3].fNeighbor_distance; strResults_out_stream << " "; for(uIndex4 = 0; uIndex4 < vclNeighborhood[uIndex3].vfAttributes.size(); uIndex4++){ strResults_out_stream << vclNeighborhood[uIndex3].vfAttributes[uIndex4]; strResults_out_stream << " "; } // for strResults_out_stream << vclNeighborhood[uIndex3].sClassification; strResults_out_stream << endl; } // for //write two blank lines to the output file - makes it easier to read strResults_out_stream << endl << endl; }//for (test instance loop) //close the output file strResults_out_stream.close(); return; }// main //***********************************************************************